Perform weighted gene correlation network analysis as originally described by Horvath et al. on delta (postvax/baseline) transcriptomic data from the KSPZV1 clinical trial.
References:
https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/ https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/JMiller/Tutorial%20document.pdf
Reviewer requested that we provide scatter plots for the correlations between modules and phenotypes. Review of the original code revealed that using Pearson’s or Spearman’s correlation to assess the relationship between modules and binary traits was not appropriate. Thus, for binary traits, we performed limma (empirical Bayes moderated t test) and presented differences as beeswarm plots with mean and 95%CI. For continuous variables, we performed Spearman’s correlation.
library(edgeR)
library(readxl)
library(EDASeq)
library(Biobase)
library(WGCNA)
library(ape)
library(Cairo)
library(CorLevelPlot)
library(tidyverse)
library(igraph)
library(remotes)
library(fgsea)
library(data.table)
library(ggplot2)
library(viridis)
library(ggpubr)
library(googledrive)
allowWGCNAThreads()
## Allowing multi-threading with up to 10 threads.
myCor <- "cor"
power <- 11.5
myMergeCutHeight <- 0.05
myDeepSplit <- 2
minModSize <- 20
enforceMMS <- FALSE
cor.pval <- 0.05
#local file: "PfSPZ_cpm_ExpressionSet_230x23768_allGroups_bothBatches_delta_rmBatchFX_06082021_TMT.rds"
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("17xx0YaggLiyWdJc9rqA1nkPSE7dj05p0"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
dim(x)
## Features Samples
## 23768 230
WGCNA_matrix <- t(exprs(x))
blockSize(ncol(WGCNA_matrix), rectangularBlocks = TRUE, maxMemoryAllocation = 4^31)
## [1] 23768
par(mfrow=c(1,1))
plotClusterTreeSamples(datExpr=WGCNA_matrix)
## NULL
powers <- seq(4,20,by=0.5)
sft <- pickSoftThreshold(WGCNA_matrix, powerVector = powers, corFnc = myCor, verbose = 5, networkType ="signed", blockSize = ncol(WGCNA_matrix))
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 23768 of 23768
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 4.0 0.052 -2.48 0.931 1840.00 1820.00 2550
## 2 4.5 0.099 -2.69 0.920 1380.00 1360.00 2040
## 3 5.0 0.176 -2.84 0.925 1040.00 1020.00 1650
## 4 5.5 0.264 -2.90 0.934 789.00 771.00 1360
## 5 6.0 0.376 -3.07 0.949 604.00 585.00 1140
## 6 6.5 0.480 -3.15 0.961 466.00 446.00 959
## 7 7.0 0.565 -3.16 0.970 362.00 342.00 816
## 8 7.5 0.622 -3.11 0.970 283.00 264.00 699
## 9 8.0 0.658 -3.07 0.963 223.00 204.00 603
## 10 8.5 0.676 -3.03 0.950 177.00 159.00 523
## 11 9.0 0.702 -2.90 0.946 141.00 124.00 456
## 12 9.5 0.716 -2.79 0.936 114.00 96.90 399
## 13 10.0 0.743 -2.64 0.935 92.10 76.20 351
## 14 10.5 0.776 -2.47 0.941 75.20 60.20 310
## 15 11.0 0.807 -2.32 0.947 61.70 47.70 275
## 16 11.5 0.862 -2.15 0.972 51.00 38.00 244
## 17 12.0 0.866 -2.16 0.970 42.50 30.30 227
## 18 12.5 0.878 -2.17 0.975 35.60 24.30 212
## 19 13.0 0.880 -2.18 0.975 30.00 19.60 200
## 20 13.5 0.888 -2.17 0.978 25.40 15.80 189
## 21 14.0 0.904 -2.14 0.986 21.70 12.80 178
## 22 14.5 0.912 -2.12 0.988 18.60 10.30 170
## 23 15.0 0.916 -2.11 0.988 16.00 8.43 162
## 24 15.5 0.923 -2.08 0.989 13.80 6.89 155
## 25 16.0 0.930 -2.05 0.991 12.00 5.63 149
## 26 16.5 0.934 -2.03 0.990 10.50 4.62 143
## 27 17.0 0.936 -2.01 0.987 9.25 3.81 138
## 28 17.5 0.940 -1.99 0.987 8.16 3.14 132
## 29 18.0 0.945 -1.96 0.988 7.22 2.59 128
## 30 18.5 0.949 -1.94 0.987 6.42 2.14 123
## 31 19.0 0.950 -1.92 0.986 5.73 1.77 119
## 32 19.5 0.955 -1.89 0.986 5.13 1.47 115
## 33 20.0 0.958 -1.87 0.986 4.61 1.23 111
sft$powerEstimate
## [1] 11.5
par(mfrow=c(1,1))
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab='Soft Threshold (power)',ylab='Scale Free Topology Model Fit,signed R²',
type='n', main = paste('Scale independence'));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=1,col='red'); abline(h=0.90,col='red')
net <- blockwiseModules(WGCNA_matrix,
power=power,
deepSplit=myDeepSplit,
minModuleSize=minModSize,
TOMType="none",
mergeCutHeight=myMergeCutHeight,
TOMDenom="mean",
detectCutHeight=0.995,
corType="pearson",
networkType="signed",
pamStage=TRUE,
pamRespectsDendro=TRUE,
reassignThresh=0.05,
verbose=5,
saveTOMs=FALSE,
maxBlockSize=ncol(WGCNA_matrix),
nThreads = 0)
## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will use 10 parallel threads.
## Fraction of slow calculations: 0.000000
##
## ....clustering..
## ....detecting modules..
## ..done.
## ....calculating module eigengenes..
## moduleEigengenes : Working on ME for module 1
## moduleEigengenes : Working on ME for module 2
## moduleEigengenes : Working on ME for module 3
## moduleEigengenes : Working on ME for module 4
## moduleEigengenes : Working on ME for module 5
## moduleEigengenes : Working on ME for module 6
## moduleEigengenes : Working on ME for module 7
## moduleEigengenes : Working on ME for module 8
## moduleEigengenes : Working on ME for module 9
## moduleEigengenes : Working on ME for module 10
## moduleEigengenes : Working on ME for module 11
## moduleEigengenes : Working on ME for module 12
## moduleEigengenes : Working on ME for module 13
## moduleEigengenes : Working on ME for module 14
## moduleEigengenes : Working on ME for module 15
## moduleEigengenes : Working on ME for module 16
## moduleEigengenes : Working on ME for module 17
## moduleEigengenes : Working on ME for module 18
## moduleEigengenes : Working on ME for module 19
## moduleEigengenes : Working on ME for module 20
## moduleEigengenes : Working on ME for module 21
## moduleEigengenes : Working on ME for module 22
## moduleEigengenes : Working on ME for module 23
## moduleEigengenes : Working on ME for module 24
## moduleEigengenes : Working on ME for module 25
## moduleEigengenes : Working on ME for module 26
## moduleEigengenes : Working on ME for module 27
## moduleEigengenes : Working on ME for module 28
## moduleEigengenes : Working on ME for module 29
## moduleEigengenes : Working on ME for module 30
## moduleEigengenes : Working on ME for module 31
## moduleEigengenes : Working on ME for module 32
## moduleEigengenes : Working on ME for module 33
## moduleEigengenes : Working on ME for module 34
## moduleEigengenes : Working on ME for module 35
## moduleEigengenes : Working on ME for module 36
## moduleEigengenes : Working on ME for module 37
## moduleEigengenes : Working on ME for module 38
## moduleEigengenes : Working on ME for module 39
## moduleEigengenes : Working on ME for module 40
## moduleEigengenes : Working on ME for module 41
## moduleEigengenes : Working on ME for module 42
## moduleEigengenes : Working on ME for module 43
## moduleEigengenes : Working on ME for module 44
## moduleEigengenes : Working on ME for module 45
## moduleEigengenes : Working on ME for module 46
## moduleEigengenes : Working on ME for module 47
## moduleEigengenes : Working on ME for module 48
## moduleEigengenes : Working on ME for module 49
## moduleEigengenes : Working on ME for module 50
## moduleEigengenes : Working on ME for module 51
## moduleEigengenes : Working on ME for module 52
## moduleEigengenes : Working on ME for module 53
## moduleEigengenes : Working on ME for module 54
## moduleEigengenes : Working on ME for module 55
## moduleEigengenes : Working on ME for module 56
## moduleEigengenes : Working on ME for module 57
## moduleEigengenes : Working on ME for module 58
## moduleEigengenes : Working on ME for module 59
## moduleEigengenes : Working on ME for module 60
## moduleEigengenes : Working on ME for module 61
## moduleEigengenes : Working on ME for module 62
## moduleEigengenes : Working on ME for module 63
## moduleEigengenes : Working on ME for module 64
## moduleEigengenes : Working on ME for module 65
## moduleEigengenes : Working on ME for module 66
## moduleEigengenes : Working on ME for module 67
## moduleEigengenes : Working on ME for module 68
## moduleEigengenes : Working on ME for module 69
## moduleEigengenes : Working on ME for module 70
## moduleEigengenes : Working on ME for module 71
## moduleEigengenes : Working on ME for module 72
## moduleEigengenes : Working on ME for module 73
## moduleEigengenes : Working on ME for module 74
## moduleEigengenes : Working on ME for module 75
## moduleEigengenes : Working on ME for module 76
## moduleEigengenes : Working on ME for module 77
## moduleEigengenes : Working on ME for module 78
## moduleEigengenes : Working on ME for module 79
## moduleEigengenes : Working on ME for module 80
## moduleEigengenes : Working on ME for module 81
## moduleEigengenes : Working on ME for module 82
## moduleEigengenes : Working on ME for module 83
## moduleEigengenes : Working on ME for module 84
## moduleEigengenes : Working on ME for module 85
## moduleEigengenes : Working on ME for module 86
## moduleEigengenes : Working on ME for module 87
## moduleEigengenes : Working on ME for module 88
## moduleEigengenes : Working on ME for module 89
## moduleEigengenes : Working on ME for module 90
## moduleEigengenes : Working on ME for module 91
## moduleEigengenes : Working on ME for module 92
## moduleEigengenes : Working on ME for module 93
## moduleEigengenes : Working on ME for module 94
## moduleEigengenes : Working on ME for module 95
## moduleEigengenes : Working on ME for module 96
## moduleEigengenes : Working on ME for module 97
## moduleEigengenes : Working on ME for module 98
## moduleEigengenes : Working on ME for module 99
## moduleEigengenes : Working on ME for module 100
## moduleEigengenes : Working on ME for module 101
## moduleEigengenes : Working on ME for module 102
## moduleEigengenes : Working on ME for module 103
## moduleEigengenes : Working on ME for module 104
## moduleEigengenes : Working on ME for module 105
## moduleEigengenes : Working on ME for module 106
## moduleEigengenes : Working on ME for module 107
## moduleEigengenes : Working on ME for module 108
## moduleEigengenes : Working on ME for module 109
## moduleEigengenes : Working on ME for module 110
## moduleEigengenes : Working on ME for module 111
## moduleEigengenes : Working on ME for module 112
## moduleEigengenes : Working on ME for module 113
## moduleEigengenes : Working on ME for module 114
## moduleEigengenes : Working on ME for module 115
## moduleEigengenes : Working on ME for module 116
## moduleEigengenes : Working on ME for module 117
## moduleEigengenes : Working on ME for module 118
## moduleEigengenes : Working on ME for module 119
## moduleEigengenes : Working on ME for module 120
## moduleEigengenes : Working on ME for module 121
## moduleEigengenes : Working on ME for module 122
## ....checking kME in modules..
## ..removing 32 genes from module 2 because their KME is too low.
## ..removing 18 genes from module 3 because their KME is too low.
## ..removing 1 genes from module 4 because their KME is too low.
## ..removing 1 genes from module 8 because their KME is too low.
## ..removing 2 genes from module 18 because their KME is too low.
## ..removing 1 genes from module 20 because their KME is too low.
## ..removing 1 genes from module 21 because their KME is too low.
## ..removing 3 genes from module 23 because their KME is too low.
## ..removing 2 genes from module 33 because their KME is too low.
## ..removing 2 genes from module 49 because their KME is too low.
## ..removing 1 genes from module 50 because their KME is too low.
## ..reassigning 15 genes from module 1 to modules with higher KME.
## ..reassigning 216 genes from module 2 to modules with higher KME.
## ..reassigning 131 genes from module 3 to modules with higher KME.
## ..reassigning 62 genes from module 4 to modules with higher KME.
## ..reassigning 105 genes from module 5 to modules with higher KME.
## ..reassigning 86 genes from module 6 to modules with higher KME.
## ..reassigning 47 genes from module 7 to modules with higher KME.
## ..reassigning 47 genes from module 8 to modules with higher KME.
## ..reassigning 55 genes from module 9 to modules with higher KME.
## ..reassigning 37 genes from module 10 to modules with higher KME.
## ..reassigning 14 genes from module 11 to modules with higher KME.
## ..reassigning 54 genes from module 12 to modules with higher KME.
## ..reassigning 28 genes from module 13 to modules with higher KME.
## ..reassigning 67 genes from module 14 to modules with higher KME.
## ..reassigning 36 genes from module 15 to modules with higher KME.
## ..reassigning 43 genes from module 16 to modules with higher KME.
## ..reassigning 35 genes from module 17 to modules with higher KME.
## ..reassigning 29 genes from module 18 to modules with higher KME.
## ..reassigning 35 genes from module 19 to modules with higher KME.
## ..reassigning 48 genes from module 20 to modules with higher KME.
## ..reassigning 30 genes from module 21 to modules with higher KME.
## ..reassigning 21 genes from module 22 to modules with higher KME.
## ..reassigning 41 genes from module 23 to modules with higher KME.
## ..reassigning 20 genes from module 24 to modules with higher KME.
## ..reassigning 47 genes from module 25 to modules with higher KME.
## ..reassigning 21 genes from module 26 to modules with higher KME.
## ..reassigning 3 genes from module 27 to modules with higher KME.
## ..reassigning 51 genes from module 28 to modules with higher KME.
## ..reassigning 22 genes from module 29 to modules with higher KME.
## ..reassigning 18 genes from module 30 to modules with higher KME.
## ..reassigning 29 genes from module 31 to modules with higher KME.
## ..reassigning 35 genes from module 32 to modules with higher KME.
## ..reassigning 37 genes from module 33 to modules with higher KME.
## ..reassigning 30 genes from module 34 to modules with higher KME.
## ..reassigning 35 genes from module 35 to modules with higher KME.
## ..reassigning 23 genes from module 36 to modules with higher KME.
## ..reassigning 39 genes from module 37 to modules with higher KME.
## ..reassigning 40 genes from module 38 to modules with higher KME.
## ..reassigning 16 genes from module 39 to modules with higher KME.
## ..reassigning 39 genes from module 40 to modules with higher KME.
## ..reassigning 22 genes from module 41 to modules with higher KME.
## ..reassigning 23 genes from module 42 to modules with higher KME.
## ..reassigning 13 genes from module 43 to modules with higher KME.
## ..reassigning 17 genes from module 44 to modules with higher KME.
## ..reassigning 28 genes from module 45 to modules with higher KME.
## ..reassigning 13 genes from module 46 to modules with higher KME.
## ..reassigning 10 genes from module 47 to modules with higher KME.
## ..reassigning 7 genes from module 48 to modules with higher KME.
## ..reassigning 8 genes from module 49 to modules with higher KME.
## ..reassigning 37 genes from module 50 to modules with higher KME.
## ..reassigning 10 genes from module 51 to modules with higher KME.
## ..reassigning 24 genes from module 52 to modules with higher KME.
## ..reassigning 23 genes from module 53 to modules with higher KME.
## ..reassigning 28 genes from module 54 to modules with higher KME.
## ..reassigning 28 genes from module 55 to modules with higher KME.
## ..reassigning 19 genes from module 56 to modules with higher KME.
## ..reassigning 17 genes from module 57 to modules with higher KME.
## ..reassigning 12 genes from module 58 to modules with higher KME.
## ..reassigning 15 genes from module 59 to modules with higher KME.
## ..reassigning 22 genes from module 60 to modules with higher KME.
## ..reassigning 13 genes from module 61 to modules with higher KME.
## ..reassigning 20 genes from module 62 to modules with higher KME.
## ..reassigning 16 genes from module 63 to modules with higher KME.
## ..reassigning 4 genes from module 64 to modules with higher KME.
## ..reassigning 17 genes from module 65 to modules with higher KME.
## ..reassigning 7 genes from module 66 to modules with higher KME.
## ..reassigning 14 genes from module 67 to modules with higher KME.
## ..reassigning 22 genes from module 68 to modules with higher KME.
## ..reassigning 16 genes from module 69 to modules with higher KME.
## ..reassigning 18 genes from module 70 to modules with higher KME.
## ..reassigning 21 genes from module 71 to modules with higher KME.
## ..reassigning 12 genes from module 72 to modules with higher KME.
## ..reassigning 12 genes from module 73 to modules with higher KME.
## ..reassigning 15 genes from module 74 to modules with higher KME.
## ..reassigning 17 genes from module 75 to modules with higher KME.
## ..reassigning 6 genes from module 76 to modules with higher KME.
## ..reassigning 6 genes from module 77 to modules with higher KME.
## ..reassigning 13 genes from module 78 to modules with higher KME.
## ..reassigning 10 genes from module 79 to modules with higher KME.
## ..reassigning 6 genes from module 80 to modules with higher KME.
## ..reassigning 16 genes from module 81 to modules with higher KME.
## ..reassigning 6 genes from module 82 to modules with higher KME.
## ..reassigning 13 genes from module 83 to modules with higher KME.
## ..reassigning 8 genes from module 84 to modules with higher KME.
## ..reassigning 3 genes from module 85 to modules with higher KME.
## ..reassigning 4 genes from module 86 to modules with higher KME.
## ..reassigning 3 genes from module 87 to modules with higher KME.
## ..reassigning 6 genes from module 88 to modules with higher KME.
## ..reassigning 8 genes from module 89 to modules with higher KME.
## ..reassigning 10 genes from module 90 to modules with higher KME.
## ..reassigning 7 genes from module 91 to modules with higher KME.
## ..reassigning 11 genes from module 92 to modules with higher KME.
## ..reassigning 14 genes from module 93 to modules with higher KME.
## ..reassigning 9 genes from module 94 to modules with higher KME.
## ..reassigning 6 genes from module 95 to modules with higher KME.
## ..reassigning 2 genes from module 96 to modules with higher KME.
## ..reassigning 6 genes from module 97 to modules with higher KME.
## ..reassigning 11 genes from module 98 to modules with higher KME.
## ..reassigning 4 genes from module 99 to modules with higher KME.
## ..reassigning 6 genes from module 100 to modules with higher KME.
## ..reassigning 8 genes from module 101 to modules with higher KME.
## ..reassigning 1 genes from module 102 to modules with higher KME.
## ..reassigning 5 genes from module 103 to modules with higher KME.
## ..reassigning 7 genes from module 104 to modules with higher KME.
## ..reassigning 1 genes from module 105 to modules with higher KME.
## ..reassigning 2 genes from module 106 to modules with higher KME.
## ..reassigning 9 genes from module 107 to modules with higher KME.
## ..reassigning 5 genes from module 108 to modules with higher KME.
## ..reassigning 4 genes from module 109 to modules with higher KME.
## ..reassigning 6 genes from module 110 to modules with higher KME.
## ..reassigning 3 genes from module 111 to modules with higher KME.
## ..reassigning 5 genes from module 112 to modules with higher KME.
## ..reassigning 1 genes from module 113 to modules with higher KME.
## ..reassigning 2 genes from module 114 to modules with higher KME.
## ..reassigning 4 genes from module 115 to modules with higher KME.
## ..reassigning 3 genes from module 116 to modules with higher KME.
## ..reassigning 6 genes from module 117 to modules with higher KME.
## ..reassigning 1 genes from module 118 to modules with higher KME.
## ..reassigning 2 genes from module 119 to modules with higher KME.
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.05
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 122 module eigengenes in given set.
## Calculating new MEs...
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 122 module eigengenes in given set.
# Summary Module Table
nModules <- length(table(net$colors))-1
modules <- cbind(colnames(as.matrix(table(net$colors))),table(net$colors))
orderedModules <- cbind(Mnum=paste("M",seq(1:nModules),sep=""),Color=labels2colors(c(1:nModules)))
modules <- modules[match(as.character(orderedModules[,2]),rownames(modules)),]
tmpMEs <- MEs <- net$MEs
colnames(tmpMEs) <- paste("ME",colnames(MEs),sep="")
kMEdat <- signedKME(WGCNA_matrix, tmpMEs, corFnc=myCor) #calculate (signed) eigengene-based connectivity, also known as module membership
WGCNA_dat <- cbind(fData(x)$GeneSymbol, colnames(WGCNA_matrix),net$colors,kMEdat) %>%
as.data.frame() %>%
dplyr::rename(GeneSymbol = "fData(x)$GeneSymbol") %>%
dplyr::rename(ENSEMBLID = "colnames(WGCNA_matrix)") %>%
dplyr::rename(ModuleColors = "net$colors")
# Define numbers of genes and samples
nGenes = ncol(WGCNA_matrix)
nSamples = nrow(WGCNA_matrix)
datvar <- pData(x) %>%
dplyr::select(PATID, SEQBATCH, site, SEX, age.vax1, mal.vax.1, treat, mal.atp.3, tte.mal.atp.6, mal.dvax, mal.dvax.tot, pfcsp_pre, pfcsp_post, log2FC_CSPAb) %>%
mutate('Gender (female)' = factor(ifelse(SEX == "F", 1, 0))) %>%
mutate('Site (Siaya)' = factor(ifelse(site == "Siaya", 1, 0))) %>%
dplyr::rename('Pf infection at first vax' = "mal.vax.1") %>%
mutate(pfcsp_pre = ifelse(is.na(pfcsp_pre), median(pfcsp_pre, na.rm=TRUE), pfcsp_pre)) %>%
mutate(pfcsp_post = ifelse(is.na(pfcsp_post), median(pfcsp_post, na.rm=TRUE), pfcsp_post)) %>%
mutate(mal.dvax.tot = ifelse(is.na(mal.dvax.tot), median(mal.dvax.tot, na.rm=TRUE), mal.dvax.tot)) %>%
mutate('pre-vax anti-CSP IgG' = log10(pfcsp_pre+1)) %>%
mutate('post-vax anti-CSP IgG' = log10(pfcsp_post+1)) %>%
mutate('log2FC anti-CSP IgG' = log2((pfcsp_post+1)/(pfcsp_pre+1))) %>%
mutate('1.8 x 10^6 PfSPZ' = factor(ifelse(treat == "1.8 x 10^6 PfSPZ", 1, 0))) %>%
mutate('parasitemic events during vax period' = mal.dvax.tot) %>%
mutate('uninfected, 3 months' = factor(ifelse(mal.atp.3 == 0, 1, 0))) %>%
dplyr::rename('days to first parasitemia' = "tte.mal.atp.6") %>%
mutate(Age = age.vax1) %>%
dplyr::select(PATID, Age, site, SEX, treat, 'pre-vax anti-CSP IgG', 'parasitemic events during vax period', 'uninfected, 3 months', 'days to first parasitemia', 'log2FC anti-CSP IgG') %>% #delta
as_tibble() %>%
column_to_rownames(var = "PATID")
modTraitCor <- cor(orderMEs(net$MEs), datvar, use = "p")
modTraitP <- corPvalueStudent(modTraitCor, nSamples)
#Since we have a moderately large number of modules and traits, a suitable graphical representation will help in reading the table. We color code each association by the correlation value: Will display correlations and their p-values
#Select out only modules that have P<0.05 in Protection
modTraitP.temp <- modTraitP %>%
as.data.frame() %>%
rownames_to_column(var = "Module") %>%
filter(.$'uninfected, 3 months' < cor.pval | .$'days to first parasitemia' < cor.pval)
modTraitCor.select <- modTraitCor[modTraitP.temp$Module,]
modTraitP.select <- modTraitP[modTraitP.temp$Module,]
textMatrix <- paste(signif(modTraitCor.select, 2), "\n(P=",
signif(modTraitP.select, 1), ")", sep = "")
dim(textMatrix) <- dim(modTraitCor.select)
topmodules <- chooseTopHubInEachModule(WGCNA_matrix, net$colors, omitColors = "grey", power = 12.5, type ="signed")
par(mar = c(11, 9, 1, 1))
labeledHeatmap(Matrix = modTraitCor.select, xLabels = names(datvar),
yLabels = rownames(modTraitCor.select), ySymbols = rownames(modTraitCor.select),
colorLabels =FALSE,colors=blueWhiteRed(100),textMatrix=textMatrix,
setStdMargins = FALSE, zlim = c(-1,1),
main = paste("Module-trait relationships"),xLabelsAngle = 45)
myColors <- gsub("ME", "", rownames(modTraitCor.select))
topmodules <- chooseTopHubInEachModule(WGCNA_matrix, net$colors, omitColors = "grey", power = 12.5, type ="signed")
mytopmodules <- topmodules[myColors] %>%
as.data.frame() %>%
dplyr::rename(EnsemblID = ".") %>%
rownames_to_column("module_label") %>%
as_tibble() %>%
left_join(., fData(x) %>%
dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")
devtools::source_url("https://github.com/jtlovell/limmaDE2/blob/master/R/wgcna2igraph.R?raw=TRUE")
graph <- wgcna2igraph(net = net, WGCNA_matrix, modules2plot = myColors,
colors2plot = myColors,
kME.threshold = 0.5, adjacency.threshold = 0.1,
adj.power = power, verbose = T,
node.size = 1, frame.color = NA, node.color = scales::muted("red"),
edge.alpha = .7, edge.width = 0.5)
## using KME values to find genes with high module membership
## subsetting to modules: lightgreen, paleturquoise, lightblue4, coral4, salmon2, brown4, midnightblue
## culling edges by adjacency
## removing unconnected nodes
## coverting to igraph format
## returning a network with 371 nodes and 10861 edges
hubscores <- hub_score(graph, scale = TRUE, weights = NULL,
options = arpack_defaults)
| module_label | EnsemblID | GeneSymbol |
|---|---|---|
| lightgreen | ENSG00000012124 | CD22 |
| paleturquoise | ENSG00000186265 | BTLA |
| lightblue4 | ENSG00000255641 | NKG2-E |
| coral4 | ENSG00000012817 | KDM5D |
| salmon2 | ENSG00000153827 | TRIP12 |
| brown4 | ENSG00000008952 | SEC62 |
| midnightblue | ENSG00000101782 | RIOK3 |
Network graphs of significant modules containing nodes (red dots) and edges (lines) meeting minimum thresholds. Correlations between nodes in different modules are shown as black edges.
This was requested by a reviewer.
First find significant Spearman correlations (p<0.05)
all_modules <- topmodules %>%
as.data.frame() %>%
dplyr::rename(EnsemblID = ".") %>%
rownames_to_column("module_color") %>%
as_tibble() %>%
left_join(., fData(x) %>%
dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")
all_MEs <- MEs %>% #MEs[,names(lapply(WGCNA_dat_select, nrow))] %>%
rownames_to_column(var = "subject_id") %>%
left_join(., datvar %>%
rownames_to_column(var = "subject_id"),
by = "subject_id") %>%
pivot_longer(., cols = c("pre-vax anti-CSP IgG", "parasitemic events during vax period", "days to first parasitemia", "log2FC anti-CSP IgG"), names_to = "trait", values_to = "trait_value") %>%
pivot_longer(., cols = contains("ME"), names_to = "module_label", values_to = "module_eigenvalue") %>%
mutate(module_color = gsub("ME", "", module_label)) %>%
left_join(., all_modules,
by = "module_color")
cor_dat_all <- all_MEs %>%
group_by(GeneSymbol, trait) %>%
summarize(rho = cor.test(module_eigenvalue, trait_value, method = "spearman")$estimate,
p = cor.test(module_eigenvalue, trait_value, method = "spearman")$p.value)
## Warning: There were 976 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `rho = cor.test(module_eigenvalue, trait_value, method =
## "spearman")$estimate`.
## ℹ In group 1: `GeneSymbol = "ABCD2"`, `trait = "days to first parasitemia"`.
## Caused by warning in `cor.test.default()`:
## ! Cannot compute exact p-value with ties
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 975 remaining warnings.
## `summarise()` has grouped output by 'GeneSymbol'. You can override using the
## `.groups` argument.
cor_dat_signif <- cor_dat_all %>%
filter(trait == "days to first parasitemia" & p<0.05)
spearman_signif_hub_genes <- cor_dat_signif$GeneSymbol
Prepare data for differential module eigengene analysis
diffME_dat <- orderMEs(net$MEs) %>%
rownames_to_column(var = "subject") %>%
mutate(subject = gsub("\\_0", "", subject)) %>%
right_join(., datvar %>%
dplyr::select(-c(Age, "days to first parasitemia")) %>%
mutate("parasitemic during vax period" = factor(ifelse(.$"parasitemic events during vax period" > 0, "parasitemic", "not_parasitemic"),
levels = c("not_parasitemic","parasitemic"))) %>%
mutate("uninfected, 3 months" = factor(.$"uninfected, 3 months", levels = c(0,1),
labels = c("infected","never infected"))) %>%
mutate("pre-vax anti-CSP IgG" = factor(ifelse(.$"pre-vax anti-CSP IgG" > 0, "present", "absent"),
levels = c("absent","present"))) %>%
mutate("log2FC anti-CSP IgG" = factor(ifelse(.$"log2FC anti-CSP IgG" > 12.5, "high responder", "low responder"),
levels = c("low responder","high responder"))) %>%
dplyr::rename("fold-change anti-CSP IgG" = "log2FC anti-CSP IgG") %>%
rownames_to_column(var = "subject"),
by = "subject") %>%
dplyr::select(-c("parasitemic events during vax period")) %>%
pivot_longer(cols = contains("ME"), names_to = "module", values_to = "MEs") %>%
pivot_longer(cols = c("pre-vax anti-CSP IgG":"parasitemic during vax period"), names_to = "trait", values_to = "trait_cat") %>%
arrange(module, site, treat, trait, trait_cat) %>%
drop_na(trait_cat)
diffME_dat_samplesize <- diffME_dat %>%
group_by(module, treat, trait, trait_cat) %>%
summarize(n=n()) %>%
ungroup()
## `summarise()` has grouped output by 'module', 'treat', 'trait'. You can
## override using the `.groups` argument.
pheno_dat <- diffME_dat %>%
dplyr::select(subject, site, treat, trait, trait_cat) %>%
distinct(subject, site, treat, trait, trait_cat) %>%
pivot_wider(names_from = trait, values_from = trait_cat) %>%
arrange(subject) %>%
column_to_rownames(var = "subject") %>%
droplevels()
feature_dat <- diffME_dat %>%
dplyr::select(module, MEs) %>%
distinct(module) %>%
mutate(module_color = gsub("ME","", module)) %>%
left_join(., topmodules %>%
enframe() %>%
dplyr::rename(EnsemblID = "value",
module_color = "name") %>%
left_join(., fData(x) %>%
dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")) %>%
column_to_rownames(var = "module") %>%
dplyr::rename(hub_gene = "GeneSymbol")
## Joining with `by = join_by(module_color)`
exprs_dat <- orderMEs(net$MEs) %>%
rownames_to_column(var = "subject") %>%
mutate(subject = gsub("\\_0", "", subject)) %>%
arrange(subject) %>%
column_to_rownames(var = "subject") %>%
dplyr::select(rownames(feature_dat)) %>%
t()
#check
ifelse(all(colnames(exprs_dat) == rownames(pheno_dat)) &
all(rownames(exprs_dat) == rownames(feature_dat)),
"Good to go!",
"Check for matching names")
## [1] "Good to go!"
#make expression set
wgcna_eset <- ExpressionSet(exprs_dat, phenoData = AnnotatedDataFrame(pheno_dat), featureData = AnnotatedDataFrame(feature_dat))
#Determine differences between module eigenvalues as binary using limma
#Pf_baseline
pf_during_vax <- factor(wgcna_eset$`parasitemic during vax period`) #encodes a vector as a factor
treatment <- wgcna_eset$treat
site <- factor(wgcna_eset$site)
design <- model.matrix(~0+pf_during_vax+site+treatment)
colnames(design)
## [1] "pf_during_vaxnot_parasitemic" "pf_during_vaxparasitemic"
## [3] "siteWagai" "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("uninfected_during_vax", "infected_during_vax","wagai", "lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(infected_during_vax - uninfected_during_vax,
levels=design)
contrasts
## Contrasts
## Levels infected_during_vax - uninfected_during_vax
## uninfected_during_vax -1
## infected_during_vax 1
## wagai 0
## lowdose 0
## mediumdose 0
## highdose 0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)
Pf_during_vax <- topTable(fit2, coef="infected_during_vax - uninfected_during_vax", number = Inf) %>%
rownames_to_column("module") %>%
right_join(., feature_dat %>%
rownames_to_column("module"),
by = "module") %>%
mutate(comparison = "infected_vs_uninfected_during_vax") %>%
dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
head(Pf_during_vax)
## comparison module module_color hub_gene
## 1 infected_vs_uninfected_during_vax MEmidnightblue midnightblue RIOK3
## 2 infected_vs_uninfected_during_vax MEbrown4 brown4 SEC62
## 3 infected_vs_uninfected_during_vax MEdarkseagreen3 darkseagreen3 GHITM
## 4 infected_vs_uninfected_during_vax MEyellow yellow RNF10
## 5 infected_vs_uninfected_during_vax MEsalmon2 salmon2 TRIP12
## 6 infected_vs_uninfected_during_vax MEdarkgrey darkgrey FLNC
## logFC P.Value adj.P.Val
## 1 -0.03001621 0.0006197385 0.07388096
## 2 -0.02838852 0.0012111633 0.07388096
## 3 -0.02682087 0.0022262108 0.09053257
## 4 -0.02529874 0.0039862594 0.10416229
## 5 -0.02509221 0.0042689465 0.10416229
## 6 0.02090366 0.0171855955 0.34944044
#prevax_antiCSPIgG
prevax_antiCSPIgG <- factor(wgcna_eset$`pre-vax anti-CSP IgG`)
design <- model.matrix(~0+prevax_antiCSPIgG+site+treatment)
colnames(design)
## [1] "prevax_antiCSPIgGabsent" "prevax_antiCSPIgGpresent"
## [3] "siteWagai" "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("noCSPAb_first_vax", "CSPAb_first_vax","wagai","lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(CSPAb_first_vax - noCSPAb_first_vax,
levels=design)
contrasts
## Contrasts
## Levels CSPAb_first_vax - noCSPAb_first_vax
## noCSPAb_first_vax -1
## CSPAb_first_vax 1
## wagai 0
## lowdose 0
## mediumdose 0
## highdose 0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)
CSPAb_baseline <- topTable(fit2, coef="CSPAb_first_vax - noCSPAb_first_vax", number = Inf) %>%
rownames_to_column("module") %>%
right_join(., feature_dat %>%
rownames_to_column("module"),
by = "module") %>%
mutate(comparison = "present_vs_absent_baseline") %>%
dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
head(CSPAb_baseline)
## comparison module module_color hub_gene logFC
## 1 present_vs_absent_baseline MEchocolate4 chocolate4 SWT1 -0.03090976
## 2 present_vs_absent_baseline MElightyellow lightyellow PRRC2B 0.02811778
## 3 present_vs_absent_baseline MEdeeppink deeppink RSL24D1 -0.02779748
## 4 present_vs_absent_baseline MEmediumpurple2 mediumpurple2 ARHGEF18 0.02731979
## 5 present_vs_absent_baseline MEplum4 plum4 TRAM1 -0.02592426
## 6 present_vs_absent_baseline MEdarkseagreen3 darkseagreen3 GHITM -0.02565887
## P.Value adj.P.Val
## 1 0.0009655604 0.1074472
## 2 0.0026926506 0.1074472
## 3 0.0030121857 0.1074472
## 4 0.0035228597 0.1074472
## 5 0.0055977165 0.1225999
## 6 0.0061032037 0.1225999
#protection
outcome_3mos <- factor(wgcna_eset$`uninfected, 3 months`)
design <- model.matrix(~0+outcome_3mos+site+treatment)
colnames(design)
## [1] "outcome_3mosinfected" "outcome_3mosnever infected"
## [3] "siteWagai" "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("not_protected","protected", "wagai", "lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(protected - not_protected,
levels=design)
contrasts
## Contrasts
## Levels protected - not_protected
## not_protected -1
## protected 1
## wagai 0
## lowdose 0
## mediumdose 0
## highdose 0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)
p_vs_np_3mos <- topTable(fit2, coef="protected - not_protected", number = Inf) %>%
rownames_to_column("module") %>%
right_join(., feature_dat %>%
rownames_to_column("module"),
by = "module") %>%
mutate(comparison = "protected_vs_not_protected") %>%
dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
head(p_vs_np_3mos)
## comparison module module_color hub_gene logFC
## 1 protected_vs_not_protected MEbrown4 brown4 SEC62 0.02060079
## 2 protected_vs_not_protected MEmidnightblue midnightblue RIOK3 0.01897438
## 3 protected_vs_not_protected MEcoral4 coral4 KDM5D -0.01675378
## 4 protected_vs_not_protected MEblue2 blue2 ADGRE5 -0.01608296
## 5 protected_vs_not_protected MEsalmon2 salmon2 TRIP12 0.01595809
## 6 protected_vs_not_protected MEmagenta3 magenta3 PRR13 -0.01575198
## P.Value adj.P.Val
## 1 0.02056804 0.8625497
## 2 0.03284857 0.8625497
## 3 0.05980260 0.8625497
## 4 0.07063734 0.8625497
## 5 0.07310500 0.8625497
## 6 0.07689212 0.8625497
#CSP Ab response
FC_CSPAb <- factor(wgcna_eset$`fold-change anti-CSP IgG`)
design <- model.matrix(~0+FC_CSPAb+site+treatment)
colnames(design)
## [1] "FC_CSPAblow responder" "FC_CSPAbhigh responder"
## [3] "siteWagai" "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("low_responder","high_responder","wagai","lowdose","mediumdose","highdose")
fit <- lmFit(exprs_dat, design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(high_responder - low_responder,
levels=design)
contrasts
## Contrasts
## Levels high_responder - low_responder
## low_responder -1
## high_responder 1
## wagai 0
## lowdose 0
## mediumdose 0
## highdose 0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)
hi_vs_low_CSP <- topTable(fit2, coef="high_responder - low_responder", number = Inf) %>%
rownames_to_column("module") %>%
right_join(., feature_dat %>%
rownames_to_column("module"),
by = "module") %>%
mutate(comparison = "hi_vs_low_CSP") %>%
dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
head(hi_vs_low_CSP)
## comparison module module_color hub_gene logFC P.Value
## 1 hi_vs_low_CSP MEindianred4 indianred4 IL1RN -0.03855345 0.006462946
## 2 hi_vs_low_CSP MEviolet violet AC003991.3 -0.03741874 0.008128466
## 3 hi_vs_low_CSP MEskyblue skyblue NFAM1 -0.03484879 0.013560050
## 4 hi_vs_low_CSP MEplum3 plum3 LRRC25 -0.03425417 0.015312087
## 5 hi_vs_low_CSP MEpurple purple RNF24 -0.03413091 0.015647248
## 6 hi_vs_low_CSP MEdarkorange2 darkorange2 PTAFR -0.03353148 0.017530385
## adj.P.Val
## 1 0.3564512
## 2 0.3564512
## 3 0.3564512
## 4 0.3564512
## 5 0.3564512
## 6 0.3564512
all_module_trait_dat <- rbind(Pf_during_vax, CSPAb_baseline, hi_vs_low_CSP, p_vs_np_3mos) %>%
arrange(P.Value)
p_vs_np_3mos_select <- p_vs_np_3mos %>%
filter(P.Value<0.05)
all_module_trait_hm_dat <- all_module_trait_dat %>%
filter(hub_gene %in% p_vs_np_3mos_select$hub_gene) %>%
dplyr::select(comparison, module_color, hub_gene, logFC) %>%
pivot_wider(names_from = comparison, values_from = logFC) %>%
column_to_rownames(var = "hub_gene")
#lapply(WGCNA_dat_select, nrow)
### Heatamp for WGCNA Limma data
#Look only at modules that were significantly different between P vs NP, which is RIOK3 and SEC62
library(tidyverse)
library(ComplexHeatmap)
## Loading required package: grid
##
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
##
## pattern
## ========================================
## ComplexHeatmap version 2.17.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
p_vs_np_3mos_select <- p_vs_np_3mos %>%
filter(P.Value < 0.05) %>%
dplyr::select(comparison, hub_gene, module_color, logFC, P.Value, adj.P.Val)
Pf_during_vax_select <- Pf_during_vax %>%
filter(hub_gene %in% p_vs_np_3mos_select$hub_gene) %>%
dplyr::select(comparison, hub_gene, module_color, logFC, P.Value, adj.P.Val)
CSPAb_baseline_select <- CSPAb_baseline %>%
filter(hub_gene %in% p_vs_np_3mos_select$hub_gene) %>%
dplyr::select(comparison, hub_gene, module_color, logFC, P.Value, adj.P.Val)
hi_vs_low_CSP_select <- hi_vs_low_CSP %>%
filter(hub_gene %in% p_vs_np_3mos_select$hub_gene) %>%
dplyr::select(comparison, hub_gene, module_color, logFC, P.Value, adj.P.Val)
all_module_trait_binary_hm_dat <- bind_rows(p_vs_np_3mos_select,
Pf_during_vax_select,
CSPAb_baseline_select,
hi_vs_low_CSP_select)
all_module_trait_binary_hm_mat <- all_module_trait_binary_hm_dat %>%
dplyr::select(comparison, hub_gene, logFC) %>%
pivot_wider(names_from = "comparison", values_from = "logFC", id_cols = hub_gene) %>%
column_to_rownames(var = "hub_gene") %>%
as.matrix()
clab <- list(
"hub_gene" = c("RIOK3" = "midnightblue", "SEC62" = "brown4")
)
clab <- rev(clab)
row_ha_df <- all_module_trait_binary_hm_dat %>%
filter(comparison == "protected_vs_not_protected") %>%
dplyr::select(hub_gene)
row_ha <- rowAnnotation(df = row_ha_df, col = clab, na_col = "lavender", annotation_name_gp = gpar(fontsize = 7),
simple_anno_size = unit(0.2, "inches"))
paletteLength <- 50
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1*max(abs(all_module_trait_binary_hm_mat)), 0, length.out=ceiling(paletteLength/2) + 1),
seq(max(all_module_trait_binary_hm_mat)/paletteLength, max(abs(all_module_trait_binary_hm_mat)), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("#548FCB", "white", "#B83434"))(length(myBreaks))
my_hm <- Heatmap(all_module_trait_binary_hm_mat,
col = circlize::colorRamp2(myBreaks, myColor),
color_space = "LAB",
row_names_side = "left",
row_dend_side = "right",
left_annotation = row_ha,
border = TRUE)
draw(my_hm)
my_hub_genes <- c(spearman_signif_hub_genes, unique(all_module_trait_binary_hm_dat$hub_gene))
my_scatter_plots <- all_MEs %>%
filter(GeneSymbol %in% my_hub_genes) %>%
mutate(trait = factor(trait, levels = c("pre-vax anti-CSP IgG", "parasitemic events during vax period", "log2FC anti-CSP IgG", "days to first parasitemia"))) %>%
mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
ggplot(., aes(x = trait_value, y = module_eigenvalue)) +
geom_point() +
geom_smooth(method = "lm") +
xlab("trait value") +
ylab("module eigenvalue") +
stat_cor(method = "spearman",
cor.coef.name = "rho") +
theme_bw() +
ggh4x::facet_grid2(GeneSymbol~trait, scales = "free", switch = "y")
beeswarmplot_dat <- orderMEs(net$MEs) %>%
rownames_to_column(var = "subject") %>%
mutate(subject = gsub("\\_0", "", subject)) %>%
right_join(., pData(wgcna_eset) %>%
dplyr::select(-c(site, treat)) %>%
rownames_to_column(var = "subject"),
by = "subject") %>%
pivot_longer(cols = contains("ME"), names_to = "module_label", values_to = "module_eigenvalue") %>%
mutate(module_color = gsub("ME", "", module_label)) %>%
left_join(., all_modules,
by = "module_color") %>%
filter(GeneSymbol %in% my_hub_genes)
library(tidyverse)
library(broom)
beeswarmplot_dat_factor <- beeswarmplot_dat %>%
mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
mutate(protected_3mos = ifelse(`uninfected, 3 months` == "infected", "NP", "P")) %>%
pivot_longer(cols = c("fold-change anti-CSP IgG":"uninfected, 3 months", protected_3mos), names_to = "trait", values_to = "value") %>%
mutate(trait = factor(trait, levels = c("parasitemic during vax period",
"pre-vax anti-CSP IgG",
"uninfected, 3 months",
"protected_3mos",
"fold-change anti-CSP IgG"))) %>%
filter(trait == "protected_3mos")
beeswarmplot_dat_factor_norm_test <- beeswarmplot_dat_factor %>%
group_by(GeneSymbol, trait)%>%
do(tidy(shapiro.test(.$module_eigenvalue))) %>%
ungroup() %>%
dplyr::select(-method) %>%
mutate(is_normal = ifelse(p.value<0.05, "no","yes"))
beeswarmplot_dat_factor_n <- beeswarmplot_dat_factor %>%
group_by(GeneSymbol, trait) %>%
summarise(n = n())
## `summarise()` has grouped output by 'GeneSymbol'. You can override using the
## `.groups` argument.
data_summary <- function(x) {
m <- mean(x, na.rm=TRUE)
sem <-sd(x, na.rm=TRUE)/sqrt(sum(!is.na(x)))
ymin<-m-1.96*sem
ymax<-m+1.96*sem
return(c(y=m,ymin=ymin,ymax=ymax))
}
all_beeswarm_plot <- beeswarmplot_dat_factor %>%
ggplot(., aes(x=value, y=module_eigenvalue)) +
ggbeeswarm::geom_beeswarm(alpha = 0.5, color = "lightblue") +
stat_summary(fun="mean", geom="segment", mapping=aes(xend=..x.. - 0.1, yend=..y..), linewidth = 0.5)+
stat_summary(fun="mean", geom="segment", mapping=aes(xend=..x.. + 0.1, yend=..y..), linewidth = 0.5) +
stat_summary(fun.data=data_summary, geom="errorbar", width=0.1) +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
ylab("module eigenvalue") +
xlab("") +
theme_bw() +
ggh4x::facet_grid2(GeneSymbol~trait, scales = "free", switch = "y") #+
# theme(axis.text = element_text(colour = "black"),
# strip.background = element_blank(),
# axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
# plot.title = element_text(size=8))
ggarrange(my_scatter_plots, all_beeswarm_plot, widths = c(4,1.15))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The dot-dot notation (`..x..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(x)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
This will show Spearman’s rho as color scale.
cor_dat <- my_scatter_plots$data %>%
group_by(GeneSymbol, trait) %>%
summarize(rho = cor.test(module_eigenvalue, trait_value, method = "spearman")$estimate,
p = cor.test(module_eigenvalue, trait_value, method = "spearman")$p.value)
## Warning: There were 48 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `rho = cor.test(module_eigenvalue, trait_value, method =
## "spearman")$estimate`.
## ℹ In group 1: `GeneSymbol = BTLA`, `trait = pre-vax anti-CSP IgG`.
## Caused by warning in `cor.test.default()`:
## ! Cannot compute exact p-value with ties
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 47 remaining warnings.
## `summarise()` has grouped output by 'GeneSymbol'. You can override using the
## `.groups` argument.
hm_cor_mat <- cor_dat %>%
dplyr::select(-p) %>%
pivot_wider(names_from = trait, values_from = rho) %>%
column_to_rownames(var = "GeneSymbol") %>%
as.matrix()
mytopmodules_list <- all_modules %>%
filter(GeneSymbol %in% my_hub_genes) %>%
mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
dplyr::select(GeneSymbol, module_color) %>%
deframe()
clab <- list(
"hub_gene" = mytopmodules_list
)
clab <- rev(clab)
row_ha_df <- all_modules %>%
filter(GeneSymbol %in% my_hub_genes) %>%
dplyr::select(GeneSymbol) %>%
mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
arrange(GeneSymbol) %>%
dplyr::rename(hub_gene = "GeneSymbol")
hm_cor_mat <- hm_cor_mat[levels(row_ha_df$hub_gene),]
row_ha <- rowAnnotation(df = row_ha_df, col = clab, na_col = "lavender", annotation_name_gp = gpar(fontsize = 7),
simple_anno_size = unit(0.1, "inches"),
show_legend = FALSE)
paletteLength <- 50
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1*max(abs(hm_cor_mat)), 0, length.out=ceiling(paletteLength/2) + 1),
seq(max(hm_cor_mat)/paletteLength, max(abs(hm_cor_mat)), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("#548FCB", "white", "#B83434"))(length(myBreaks))
my_hm <- Heatmap(hm_cor_mat,
col = circlize::colorRamp2(myBreaks, myColor),
color_space = "LAB",
row_names_side = "left",
row_dend_side = "right",
cluster_rows = FALSE,
cluster_columns = FALSE,
left_annotation = row_ha,
border = TRUE)
hm_limma_mat <- p_vs_np_3mos %>%
filter(hub_gene %in% my_hub_genes) %>%
mutate(trait = "protection_3mos") %>%
dplyr::select(hub_gene, trait, logFC) %>%
pivot_wider(names_from = trait, values_from = logFC) %>%
column_to_rownames(var = "hub_gene") %>%
as.matrix(., nrow(.), byrow=TRUE)
mytopmodules_list <- all_modules %>%
filter(GeneSymbol %in% my_hub_genes) %>%
mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
dplyr::select(GeneSymbol, module_color) %>%
deframe()
clab <- list(
"hub_gene" = mytopmodules_list
)
clab <- rev(clab)
row_ha_df <- all_modules %>%
filter(GeneSymbol %in% my_hub_genes) %>%
dplyr::select(GeneSymbol) %>%
mutate(GeneSymbol = factor(GeneSymbol, levels = my_hub_genes)) %>%
arrange(GeneSymbol) %>%
dplyr::rename(hub_gene = "GeneSymbol")
hm_limma_mat <- hm_limma_mat[levels(row_ha_df$hub_gene), 1, drop = FALSE]
row_ha <- rowAnnotation(df = row_ha_df, col = clab, na_col = "lavender", annotation_name_gp = gpar(fontsize = 7),
simple_anno_size = unit(0.1, "inches"))
paletteLength <- 50
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1*max(abs(hm_limma_mat)), 0, length.out=ceiling(paletteLength/2) + 1),
seq(max(hm_limma_mat)/paletteLength, max(abs(hm_limma_mat)), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("#548FCB", "white", "#B83434"))(length(myBreaks))
my_limma_hm <- Heatmap(hm_limma_mat,
col = circlize::colorRamp2(myBreaks, myColor),
color_space = "LAB",
cluster_rows = FALSE,
row_names_side = "left",
row_dend_side = "right",
#left_annotation = row_ha,
border = TRUE)
my_hm + my_limma_hm
foo <- all_modules %>%
filter(GeneSymbol %in% my_hub_genes)
myColors <- foo$module_color
topmodules <- chooseTopHubInEachModule(WGCNA_matrix, net$colors, omitColors = "grey", power = 12.5, type ="signed")
mytopmodules <- topmodules[myColors] %>%
as.data.frame() %>%
dplyr::rename(EnsemblID = ".") %>%
rownames_to_column("module_label") %>%
as_tibble() %>%
left_join(., fData(x) %>%
dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")
devtools::source_url("https://github.com/jtlovell/limmaDE2/blob/master/R/wgcna2igraph.R?raw=TRUE")
graph <- wgcna2igraph(net = net, WGCNA_matrix, modules2plot = myColors,
colors2plot = myColors,
kME.threshold = 0.5, adjacency.threshold = 0.1,
adj.power = power, verbose = T,
node.size = 1, frame.color = NA, node.color = scales::muted("red"),
edge.alpha = .7, edge.width = 0.5)
## using KME values to find genes with high module membership
## subsetting to modules: brown4, lightblue4, lightgreen, midnightblue, mistyrose, paleturquoise
## culling edges by adjacency
## removing unconnected nodes
## coverting to igraph format
## returning a network with 316 nodes and 8675 edges
hubscores <- hub_score(graph, scale = TRUE, weights = NULL,
options = arpack_defaults)
Network graphs of significant modules containing nodes (red dots) and edges (lines) meeting minimum thresholds. Correlations between nodes in different modules are shown as black edges.
plot(graph)
List number of genes per module
downselected_MEs <- paste0("ME", myColors)
WGCNA_dat_select <- c()
for(i in downselected_MEs){
module.color <- sub("ME","", i)
module.colname <- paste0("kME", i)
WGCNA_dat_select[[i]] <- WGCNA_dat %>%
filter(ModuleColors == module.color) %>%
dplyr::select(GeneSymbol, all_of(module.colname))
}
lapply(WGCNA_dat_select, nrow)
## $MEbrown4
## [1] 93
##
## $MElightblue4
## [1] 47
##
## $MElightgreen
## [1] 136
##
## $MEmidnightblue
## [1] 156
##
## $MEmistyrose
## [1] 21
##
## $MEpaleturquoise
## [1] 110
#closeAllConnections()
source("https://raw.githubusercontent.com/TranLab/kspzv1-systems/main/helper/ApplyORA2Genesets.R")
myuniverse <- unique(fData(x)[fData(x)$EnsemblID %in% colnames(WGCNA_matrix),]$GeneSymbol)
ORA_baseline_bound_df <- c()
minSize <- 20
downselected_MEs
## [1] "MEbrown4" "MElightblue4" "MElightgreen" "MEmidnightblue"
## [5] "MEmistyrose" "MEpaleturquoise"
#Combine similar modules skyblue1, mediumpurple1, thistle3 in to one
WGCNA_dat_select2 <- list()
WGCNA_dat_select2$NKG2E <- WGCNA_dat_select$MElightblue4
WGCNA_dat_select2$LTF <- WGCNA_dat_select$MEmistyrose
WGCNA_dat_select2$BLTA_CD22 <- data.table::rbindlist(list(WGCNA_dat_select$MEpaleturquoise,
WGCNA_dat_select$MElightgreen)) %>%
as.data.frame() %>%
dplyr::rename(kMEMEall = "kMEMEpaleturquoise")
WGCNA_dat_select2$RIOK3_SEC62 <- data.table::rbindlist(list(WGCNA_dat_select$MEmidnightblue,
WGCNA_dat_select$MEbrown4)) %>%
as.data.frame() %>%
dplyr::rename(kMEMEall = "kMEMEmidnightblue")
for(k in names(WGCNA_dat_select2)){
WGCNA_dat_select2[[k]] <- WGCNA_dat_select2[[k]][order(WGCNA_dat_select2[[k]][,2], decreasing = TRUE),]
ORA_baseline_bound_df[[k]] <- ApplyORA2Genesets(genelist = WGCNA_dat_select2[[k]]$GeneSymbol,
geneset = "all",
universe = myuniverse,
output_directory = tempdir(),
filename_prefix = paste0("ORA_Mod_Corr_Protect_3_mos_", k,
"_minSize", minSize),
minSize = minSize)
}
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
NKG2E_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$NKG2E, .id = "module_type") %>%
mutate(module = "lightblue4") %>%
mutate(module_size = nrow(WGCNA_dat_select$MElightblue4)) %>%
mutate(hub_gene = "NKG2E")
LTF_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$LTF, .id = "module_type") %>%
mutate(module = "mistyrose") %>%
mutate(module_size = nrow(WGCNA_dat_select$MEmistyrose)) %>%
mutate(hub_gene = "LTF")
BLTA_CD22_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$BLTA_CD22, .id = "module_type") %>%
mutate(module = "paleturquoise and lightgreen") %>%
mutate(module_size = nrow(WGCNA_dat_select2$BLTA_CD22)) %>%
mutate(hub_gene = "BLTA_CD22")
RIOK3_SEC62_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$RIOK3_SEC62, .id = "module_type") %>%
mutate(module = "midnightblue and brown4") %>%
mutate(module_size = nrow(WGCNA_dat_select2$RIOK3_SEC62)) %>%
mutate(hub_gene = "RIOK3_SEC62")
ORA_baseline_allbound <- bind_rows(NKG2E_ORA_baseline_bound,
LTF_ORA_baseline_bound,
BLTA_CD22_ORA_baseline_bound,
RIOK3_SEC62_ORA_baseline_bound) %>%
mutate(neglogpadj = -log10(padj)) %>%
mutate(pct_overlap = 100*(overlap/module_size)) %>%
dplyr::select(module, module_size, hub_gene, module_type, pathway, overlap,
pct_overlap, size, overlapGenes, pval, padj, neglogpadj)
delta analysis
myModuleTypes <- c("MSigDB_Hallmark_v7.4", "MSigDB_C2_kegg_v7.4", "highBTMs", "BloodGen3Module")
myORAClusterPlotDat <- ORA_baseline_allbound %>%
mutate(pathway = gsub("VS", "v", pathway)) %>%
mutate(pathway = gsub("Vd", "Vδ", pathway)) %>%
mutate(pathway = gsub("gd", "γδ", pathway)) %>%
mutate(pathway = sub(".*?\\_", "", pathway)) %>%
mutate(pathway = fct_reorder(pathway, neglogpadj)) %>%
arrange(desc(neglogpadj))%>%
mutate(TextLabelColor = ifelse(module_type == "BloodGen3Module", scales::muted("red"),
ifelse(module_type == "MSigDB_C2_kegg_v7.4", scales::muted("blue"),
ifelse(module_type == "MSigDB_Hallmark_v7.4", "black","gray")))) %>%
filter(padj < 0.01) %>%
filter(pct_overlap >= 5) %>%
filter(module_type %in% myModuleTypes) %>%
group_by(hub_gene) %>%
arrange(neglogpadj) %>%
slice_tail(n = 4) %>%
ungroup()
addSmallLegend <- function(myPlot, pointSize = 1.5, textSize = 3, spaceLegend = 0.3) {
myPlot +
guides(shape = guide_legend(override.aes = list(size = pointSize)),
color = guide_legend(override.aes = list(size = pointSize))) +
theme(legend.title = element_text(size = textSize),
legend.text = element_text(size = textSize),
legend.key.size = unit(spaceLegend, "lines"))
}
scale_begin <- floor(min(myORAClusterPlotDat$pct_overlap, na.rm = TRUE))
scale_end <- ceiling(max(myORAClusterPlotDat$pct_overlap, na.rm = TRUE))
myArrangedPlot <- myORAClusterPlotDat %>%
mutate(pathway = fct_reorder(pathway, neglogpadj)) %>%
ggplot(., aes(x = neglogpadj, y = pathway, fill = pct_overlap)) +
geom_bar(stat = 'identity') +
geom_vline(xintercept = 2, color = "red", linetype = "dotted") +
scale_fill_distiller(direction = 1, breaks = c(0, 10, 20, 30, 40, 50), limits = c(0,50)) +
ylab("module") +
theme_classic(base_family = "sans", base_size = 14) +
theme(legend.position = "bottom",
plot.margin = unit(c(0,0.5,0,0.5), "cm")) +
facet_wrap(~hub_gene, scales ="free") +
theme(strip.background = element_blank())
myfont <- "Helvetica"
bubble_max_size <- 15
basetextsize <- 10
myORABubblePlot <- myORAClusterPlotDat %>%
mutate(pathway = fct_reorder(pathway, neglogpadj)) %>%
ggplot(., aes(x = hub_gene, y = pathway)) +
geom_point(aes(size=pct_overlap, fill = neglogpadj), alpha = 0.65, shape=21, stroke = 0.25) +
scale_size_area(name = expression(-log[10]~adj.~p~value), max_size = bubble_max_size, breaks = c(1, 5, 10, 20, 40, 60), limits = c(1,75)) +
viridis::scale_fill_viridis(option= "A", begin = 0.25, end = 0.75, alpha = 0.8, direction = -1, name = "% overlap") +
hrbrthemes::theme_ipsum_es(base_family = myfont, base_size = basetextsize) +
scale_x_discrete(limits=rev) +
scale_y_discrete(position = "right") +
ylab("pathway/module") +
xlab("network hub gene(s)") +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 90, vjust = 1, hjust=0)) +
coord_flip()